library(tidyverse)
library(data.table)
library(mice)
library(skimr)
library(corrplot)
library(cowplot)
bankraw <- read.csv("https://raw.githubusercontent.com/JaclynCoate/6372_Project_2/master/Data/bank-additional-full.csv", header = TRUE, sep = ";", strip.white = TRUE)
str(bankraw)
## 'data.frame': 41188 obs. of 21 variables:
## $ age : int 56 57 37 40 56 45 59 41 24 25 ...
## $ job : Factor w/ 12 levels "admin.","blue-collar",..: 4 8 8 1 8 8 1 2 10 8 ...
## $ marital : Factor w/ 4 levels "divorced","married",..: 2 2 2 2 2 2 2 2 3 3 ...
## $ education : Factor w/ 8 levels "basic.4y","basic.6y",..: 1 4 4 2 4 3 6 8 6 4 ...
## $ default : Factor w/ 3 levels "no","unknown",..: 1 2 1 1 1 2 1 2 1 1 ...
## $ housing : Factor w/ 3 levels "no","unknown",..: 1 1 3 1 1 1 1 1 3 3 ...
## $ loan : Factor w/ 3 levels "no","unknown",..: 1 1 1 1 3 1 1 1 1 1 ...
## $ contact : Factor w/ 2 levels "cellular","telephone": 2 2 2 2 2 2 2 2 2 2 ...
## $ month : Factor w/ 10 levels "apr","aug","dec",..: 7 7 7 7 7 7 7 7 7 7 ...
## $ day_of_week : Factor w/ 5 levels "fri","mon","thu",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ duration : int 261 149 226 151 307 198 139 217 380 50 ...
## $ campaign : int 1 1 1 1 1 1 1 1 1 1 ...
## $ pdays : int 999 999 999 999 999 999 999 999 999 999 ...
## $ previous : int 0 0 0 0 0 0 0 0 0 0 ...
## $ poutcome : Factor w/ 3 levels "failure","nonexistent",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ emp.var.rate : num 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 ...
## $ cons.price.idx: num 94 94 94 94 94 ...
## $ cons.conf.idx : num -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 ...
## $ euribor3m : num 4.86 4.86 4.86 4.86 4.86 ...
## $ nr.employed : num 5191 5191 5191 5191 5191 ...
## $ y : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
head(bankraw)
## age job marital education default housing loan contact month
## 1 56 housemaid married basic.4y no no no telephone may
## 2 57 services married high.school unknown no no telephone may
## 3 37 services married high.school no yes no telephone may
## 4 40 admin. married basic.6y no no no telephone may
## 5 56 services married high.school no no yes telephone may
## 6 45 services married basic.9y unknown no no telephone may
## day_of_week duration campaign pdays previous poutcome emp.var.rate
## 1 mon 261 1 999 0 nonexistent 1.1
## 2 mon 149 1 999 0 nonexistent 1.1
## 3 mon 226 1 999 0 nonexistent 1.1
## 4 mon 151 1 999 0 nonexistent 1.1
## 5 mon 307 1 999 0 nonexistent 1.1
## 6 mon 198 1 999 0 nonexistent 1.1
## cons.price.idx cons.conf.idx euribor3m nr.employed y
## 1 93.994 -36.4 4.857 5191 no
## 2 93.994 -36.4 4.857 5191 no
## 3 93.994 -36.4 4.857 5191 no
## 4 93.994 -36.4 4.857 5191 no
## 5 93.994 -36.4 4.857 5191 no
## 6 93.994 -36.4 4.857 5191 no
setnames(bankraw, "y", "Subscription")
str(bankraw)
## 'data.frame': 41188 obs. of 21 variables:
## $ age : int 56 57 37 40 56 45 59 41 24 25 ...
## $ job : Factor w/ 12 levels "admin.","blue-collar",..: 4 8 8 1 8 8 1 2 10 8 ...
## $ marital : Factor w/ 4 levels "divorced","married",..: 2 2 2 2 2 2 2 2 3 3 ...
## $ education : Factor w/ 8 levels "basic.4y","basic.6y",..: 1 4 4 2 4 3 6 8 6 4 ...
## $ default : Factor w/ 3 levels "no","unknown",..: 1 2 1 1 1 2 1 2 1 1 ...
## $ housing : Factor w/ 3 levels "no","unknown",..: 1 1 3 1 1 1 1 1 3 3 ...
## $ loan : Factor w/ 3 levels "no","unknown",..: 1 1 1 1 3 1 1 1 1 1 ...
## $ contact : Factor w/ 2 levels "cellular","telephone": 2 2 2 2 2 2 2 2 2 2 ...
## $ month : Factor w/ 10 levels "apr","aug","dec",..: 7 7 7 7 7 7 7 7 7 7 ...
## $ day_of_week : Factor w/ 5 levels "fri","mon","thu",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ duration : int 261 149 226 151 307 198 139 217 380 50 ...
## $ campaign : int 1 1 1 1 1 1 1 1 1 1 ...
## $ pdays : int 999 999 999 999 999 999 999 999 999 999 ...
## $ previous : int 0 0 0 0 0 0 0 0 0 0 ...
## $ poutcome : Factor w/ 3 levels "failure","nonexistent",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ emp.var.rate : num 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 ...
## $ cons.price.idx: num 94 94 94 94 94 ...
## $ cons.conf.idx : num -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 -36.4 ...
## $ euribor3m : num 4.86 4.86 4.86 4.86 4.86 ...
## $ nr.employed : num 5191 5191 5191 5191 5191 ...
## $ Subscription : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
#Dropping logical irrelevant variables: "duration"
bankraw2 <- select(bankraw, -c("duration"))
head(bankraw2)
## age job marital education default housing loan contact month
## 1 56 housemaid married basic.4y no no no telephone may
## 2 57 services married high.school unknown no no telephone may
## 3 37 services married high.school no yes no telephone may
## 4 40 admin. married basic.6y no no no telephone may
## 5 56 services married high.school no no yes telephone may
## 6 45 services married basic.9y unknown no no telephone may
## day_of_week campaign pdays previous poutcome emp.var.rate
## 1 mon 1 999 0 nonexistent 1.1
## 2 mon 1 999 0 nonexistent 1.1
## 3 mon 1 999 0 nonexistent 1.1
## 4 mon 1 999 0 nonexistent 1.1
## 5 mon 1 999 0 nonexistent 1.1
## 6 mon 1 999 0 nonexistent 1.1
## cons.price.idx cons.conf.idx euribor3m nr.employed Subscription
## 1 93.994 -36.4 4.857 5191 no
## 2 93.994 -36.4 4.857 5191 no
## 3 93.994 -36.4 4.857 5191 no
## 4 93.994 -36.4 4.857 5191 no
## 5 93.994 -36.4 4.857 5191 no
## 6 93.994 -36.4 4.857 5191 no
invisible(view(bankraw2))
#Checking for NAs
md.pattern(bankraw2)
## /\ /\
## { `---' }
## { O O }
## ==> V <== No need for mice. This data set is completely observed.
## \ \|/ /
## `-----'
## age job marital education default housing loan contact month
## 41188 1 1 1 1 1 1 1 1 1
## 0 0 0 0 0 0 0 0 0
## day_of_week campaign pdays previous poutcome emp.var.rate
## 41188 1 1 1 1 1 1
## 0 0 0 0 0 0
## cons.price.idx cons.conf.idx euribor3m nr.employed Subscription
## 41188 1 1 1 1 1 0
## 0 0 0 0 0 0
#Results show no NAs
skim(bankraw2)
## Skim summary statistics
## n obs: 41188
## n variables: 20
##
## ── Variable type:factor ─────────────────────────────────────────────────────────────────────────────────────────────────
## variable missing complete n n_unique
## contact 0 41188 41188 2
## day_of_week 0 41188 41188 5
## default 0 41188 41188 3
## education 0 41188 41188 8
## housing 0 41188 41188 3
## job 0 41188 41188 12
## loan 0 41188 41188 3
## marital 0 41188 41188 4
## month 0 41188 41188 10
## poutcome 0 41188 41188 3
## Subscription 0 41188 41188 2
## top_counts ordered
## cel: 26144, tel: 15044, NA: 0 FALSE
## thu: 8623, mon: 8514, wed: 8134, tue: 8090 FALSE
## no: 32588, unk: 8597, yes: 3, NA: 0 FALSE
## uni: 12168, hig: 9515, bas: 6045, pro: 5243 FALSE
## yes: 21576, no: 18622, unk: 990, NA: 0 FALSE
## adm: 10422, blu: 9254, tec: 6743, ser: 3969 FALSE
## no: 33950, yes: 6248, unk: 990, NA: 0 FALSE
## mar: 24928, sin: 11568, div: 4612, unk: 80 FALSE
## may: 13769, jul: 7174, aug: 6178, jun: 5318 FALSE
## non: 35563, fai: 4252, suc: 1373, NA: 0 FALSE
## no: 36548, yes: 4640, NA: 0 FALSE
##
## ── Variable type:integer ────────────────────────────────────────────────────────────────────────────────────────────────
## variable missing complete n mean sd p0 p25 p50 p75 p100
## age 0 41188 41188 40.02 10.42 17 32 38 47 98
## campaign 0 41188 41188 2.57 2.77 1 1 2 3 56
## pdays 0 41188 41188 962.48 186.91 0 999 999 999 999
## previous 0 41188 41188 0.17 0.49 0 0 0 0 7
## hist
## ▂▇▆▃▁▁▁▁
## ▇▁▁▁▁▁▁▁
## ▁▁▁▁▁▁▁▇
## ▇▁▁▁▁▁▁▁
##
## ── Variable type:numeric ────────────────────────────────────────────────────────────────────────────────────────────────
## variable missing complete n mean sd p0 p25
## cons.conf.idx 0 41188 41188 -40.5 4.63 -50.8 -42.7
## cons.price.idx 0 41188 41188 93.58 0.58 92.2 93.08
## emp.var.rate 0 41188 41188 0.082 1.57 -3.4 -1.8
## euribor3m 0 41188 41188 3.62 1.73 0.63 1.34
## nr.employed 0 41188 41188 5167.04 72.25 4963.6 5099.1
## p50 p75 p100 hist
## -41.8 -36.4 -26.9 ▁▅▆▃▇▁▁▁
## 93.75 93.99 94.77 ▁▁▅▅▁▇▁▂
## 1.1 1.4 1.4 ▁▁▃▁▁▁▁▇
## 4.86 4.96 5.04 ▂▃▁▁▁▁▁▇
## 5191 5228.1 5228.1 ▁▁▁▁▃▁▃▇
plot(bankraw2$Subscription, bankraw2$age, xlab = "Subscription", ylab = "Age", title = "Subscription v Age", col=c(82,107))
#Upon review of the intial boxplot we see outlier that are preventing us from seeing the actual boxplot shape and move
plot(bankraw2$Subscription, bankraw2$campaign, xlab = "Subscription", ylab = "Campaign", title = "Subscription v Campaign", col=c(82,107))
#We drop all campaigns values >6 in an attempt to see the graph more clearly
campOutliers <- bankraw2[!(bankraw2$campaign > 6),]
invisible(campOutliers)
plot(campOutliers$Subscription, campOutliers$campaign, xlab = "Subscription", ylab = "Campaign < 25", title = "Subscription v Campaign < 25", col=c(82,107))
#Upon further review of this variable we are going to move forward with created a new variable that is 'Contacted' or 'Not Contacted' and evaluate as a categorical variable against 'Subscriptions'. This will be done in objective 1, but for now in the EDA we will leave as numeric.
plot(bankraw2$Subscription, bankraw2$pdays, xlab = "Subscription", ylab = "pdays", title = "Subscription v cons.conf.idx", col=c(82,107))
#In order to evaluate as numeric dropping outliers of 999 and regraphing the pdays numeric variable.
pdaysOutliers <- bankraw2[!(bankraw2$pdays == 999),]
invisible(pdaysOutliers)
plot(pdaysOutliers$Subscription, pdaysOutliers$pdays, xlab = "Subscription", ylab = "pdays", title = "Subscription v cons.conf.idx", col=c(82,107))
plot(bankraw2$Subscription, bankraw2$previous, xlab = "Subscription", ylab = "cons.conf.idx", title = "Subscription v cons.conf.idx", col=c(82,107))
plot(bankraw2$Subscription, bankraw2$cons.conf.idx, xlab = "Subscription", ylab = "cons.conf.idx", title = "Subscription v cons.conf.idx", col=c(82,107))
plot(bankraw2$Subscription, bankraw2$cons.price.idx, xlab = "Subscription", ylab = "cons.price.idx", title = "Subscription v cons.price.idx", col=c(82,107))
plot(bankraw2$Subscription, bankraw2$emp.var.rate, xlab = "Subscription", ylab = "cons.conf.idx", title = "Subscription v emp.var.rate", col=c(82,107))
plot(bankraw2$Subscription, bankraw2$euribor3m, xlab = "Subscription", ylab = "euribor3m", title = "Subscription v euribor3m", col=c(82,107))
plot(bankraw2$Subscription, bankraw2$nr.employed, xlab = "Subscription", ylab = "nr.employed", title = "Subscription v nr.employed", col=c(82,107))
densityPlots <- function(df, explanatory, response){
df %>% ggplot(aes_string(x = explanatory, fill = response)) + geom_density(alpha=0.5)
}
densityPlotsList <- lapply(bankraw2 %>% keep(is.numeric) %>% colnames, function(x) densityPlots(bankraw2, x, "Subscription"))
for(i in densityPlotsList){
print(i)
}
#Testing to see if function above works properly
#densityPlots(bankraw2, "age", "Subscription")
bankraw2 <- select(bankraw2, -c("age"))
skim(bankraw2)
## Skim summary statistics
## n obs: 41188
## n variables: 19
##
## ── Variable type:factor ─────────────────────────────────────────────────────────────────────────────────────────────────
## variable missing complete n n_unique
## contact 0 41188 41188 2
## day_of_week 0 41188 41188 5
## default 0 41188 41188 3
## education 0 41188 41188 8
## housing 0 41188 41188 3
## job 0 41188 41188 12
## loan 0 41188 41188 3
## marital 0 41188 41188 4
## month 0 41188 41188 10
## poutcome 0 41188 41188 3
## Subscription 0 41188 41188 2
## top_counts ordered
## cel: 26144, tel: 15044, NA: 0 FALSE
## thu: 8623, mon: 8514, wed: 8134, tue: 8090 FALSE
## no: 32588, unk: 8597, yes: 3, NA: 0 FALSE
## uni: 12168, hig: 9515, bas: 6045, pro: 5243 FALSE
## yes: 21576, no: 18622, unk: 990, NA: 0 FALSE
## adm: 10422, blu: 9254, tec: 6743, ser: 3969 FALSE
## no: 33950, yes: 6248, unk: 990, NA: 0 FALSE
## mar: 24928, sin: 11568, div: 4612, unk: 80 FALSE
## may: 13769, jul: 7174, aug: 6178, jun: 5318 FALSE
## non: 35563, fai: 4252, suc: 1373, NA: 0 FALSE
## no: 36548, yes: 4640, NA: 0 FALSE
##
## ── Variable type:integer ────────────────────────────────────────────────────────────────────────────────────────────────
## variable missing complete n mean sd p0 p25 p50 p75 p100
## campaign 0 41188 41188 2.57 2.77 1 1 2 3 56
## pdays 0 41188 41188 962.48 186.91 0 999 999 999 999
## previous 0 41188 41188 0.17 0.49 0 0 0 0 7
## hist
## ▇▁▁▁▁▁▁▁
## ▁▁▁▁▁▁▁▇
## ▇▁▁▁▁▁▁▁
##
## ── Variable type:numeric ────────────────────────────────────────────────────────────────────────────────────────────────
## variable missing complete n mean sd p0 p25
## cons.conf.idx 0 41188 41188 -40.5 4.63 -50.8 -42.7
## cons.price.idx 0 41188 41188 93.58 0.58 92.2 93.08
## emp.var.rate 0 41188 41188 0.082 1.57 -3.4 -1.8
## euribor3m 0 41188 41188 3.62 1.73 0.63 1.34
## nr.employed 0 41188 41188 5167.04 72.25 4963.6 5099.1
## p50 p75 p100 hist
## -41.8 -36.4 -26.9 ▁▅▆▃▇▁▁▁
## 93.75 93.99 94.77 ▁▁▅▅▁▇▁▂
## 1.1 1.4 1.4 ▁▁▃▁▁▁▁▇
## 4.86 4.96 5.04 ▂▃▁▁▁▁▁▇
## 5191 5228.1 5228.1 ▁▁▁▁▃▁▃▇
When highlighting the yes versus no result for signing up, we cannot see a clear separation of anykind. This will lead us away from utilizing the principal componenet analysis technique for variable selection
The below pairs graphs shows us the separation of Yes and No results of the dependent variable by graph. We can observe that there is no clear separation and therefore PCA would not be a good variable reduction tool moving forward. So we continue with a standard EDA.
invisible(view(bankraw2))
#Reducing to only continuous variables and graphing by continuous variables, then colored by response in order to determine if there is separation of results and the ability to utilzie PCA
bankraw2 %>% keep(is.numeric) %>% pairs(,col=bankraw2$Subscription)
#Plot numeric variables v numeric variables
bankraw2 %>% keep(is.numeric) %>% cor %>% corrplot("upper", addCoef.col = "white", number.digits = 2, number.cex = 0.5, method="square",
order="hclust", tl.srt=45, tl.cex = 0.8)
#Removing reviews_per_month due to high correlation of is and number_of_reviews
bank3 <- select(bankraw2, -c("pdays", "euribor3m", "nr.employed", "emp.var.rate"))
skim(bank3)
## Skim summary statistics
## n obs: 41188
## n variables: 15
##
## ── Variable type:factor ─────────────────────────────────────────────────────────────────────────────────────────────────
## variable missing complete n n_unique
## contact 0 41188 41188 2
## day_of_week 0 41188 41188 5
## default 0 41188 41188 3
## education 0 41188 41188 8
## housing 0 41188 41188 3
## job 0 41188 41188 12
## loan 0 41188 41188 3
## marital 0 41188 41188 4
## month 0 41188 41188 10
## poutcome 0 41188 41188 3
## Subscription 0 41188 41188 2
## top_counts ordered
## cel: 26144, tel: 15044, NA: 0 FALSE
## thu: 8623, mon: 8514, wed: 8134, tue: 8090 FALSE
## no: 32588, unk: 8597, yes: 3, NA: 0 FALSE
## uni: 12168, hig: 9515, bas: 6045, pro: 5243 FALSE
## yes: 21576, no: 18622, unk: 990, NA: 0 FALSE
## adm: 10422, blu: 9254, tec: 6743, ser: 3969 FALSE
## no: 33950, yes: 6248, unk: 990, NA: 0 FALSE
## mar: 24928, sin: 11568, div: 4612, unk: 80 FALSE
## may: 13769, jul: 7174, aug: 6178, jun: 5318 FALSE
## non: 35563, fai: 4252, suc: 1373, NA: 0 FALSE
## no: 36548, yes: 4640, NA: 0 FALSE
##
## ── Variable type:integer ────────────────────────────────────────────────────────────────────────────────────────────────
## variable missing complete n mean sd p0 p25 p50 p75 p100 hist
## campaign 0 41188 41188 2.57 2.77 1 1 2 3 56 ▇▁▁▁▁▁▁▁
## previous 0 41188 41188 0.17 0.49 0 0 0 0 7 ▇▁▁▁▁▁▁▁
##
## ── Variable type:numeric ────────────────────────────────────────────────────────────────────────────────────────────────
## variable missing complete n mean sd p0 p25 p50
## cons.conf.idx 0 41188 41188 -40.5 4.63 -50.8 -42.7 -41.8
## cons.price.idx 0 41188 41188 93.58 0.58 92.2 93.08 93.75
## p75 p100 hist
## -36.4 -26.9 ▁▅▆▃▇▁▁▁
## 93.99 94.77 ▁▁▅▅▁▇▁▂
EUbank3 <- select(bankraw2, -c("pdays", "nr.employed", "emp.var.rate"))
#Plot numeric continuous variables to double check all correlated values have been removed
bank3 %>% keep(is.numeric) %>% cor %>% corrplot("upper", addCoef.col = "white", number.digits = 2, number.cex = 0.5, method="square", order="hclust", tl.srt=45, tl.cex = 0.8)
#Plot numeric continuous variables to double check all correlated values have been removed
EUbank3 %>% keep(is.numeric) %>% cor %>% corrplot("upper", addCoef.col = "white", number.digits = 2, number.cex = 0.5, method="square", order="hclust", tl.srt=45, tl.cex = 0.8)
# 1. Name target variable
#targetCatCat <- "Subscription"
# 2. Name explanatory variable
#explanatory <- bank3 %>% keep(is.factor) %>% colnames
# 3. Create function
numCatCat <- function(df, explanatory, response) {
ggplot(data = df) +geom_bar(aes_string(x = explanatory, fill = response), position = "fill", alpha = 0.9) + coord_flip() #+ xlab(explanatory)
}
# # 3a. Example of working function above
# # numCatCat(bank3, explanatory = "education", response = "Subscription")
# 4. Create plot list for plot_grid function to reference
#plotlistCatCat <- lapply(explanatory, function(x) numCatCat(bank3, x, targetCatCat))
# 5. Grid of all categorical variables plotted against y = Subscription
#plot_grid(plotlist = plotlistCatCat)
head(bank3)
## job marital education default housing loan contact month
## 1 housemaid married basic.4y no no no telephone may
## 2 services married high.school unknown no no telephone may
## 3 services married high.school no yes no telephone may
## 4 admin. married basic.6y no no no telephone may
## 5 services married high.school no no yes telephone may
## 6 services married basic.9y unknown no no telephone may
## day_of_week campaign previous poutcome cons.price.idx cons.conf.idx
## 1 mon 1 0 nonexistent 93.994 -36.4
## 2 mon 1 0 nonexistent 93.994 -36.4
## 3 mon 1 0 nonexistent 93.994 -36.4
## 4 mon 1 0 nonexistent 93.994 -36.4
## 5 mon 1 0 nonexistent 93.994 -36.4
## 6 mon 1 0 nonexistent 93.994 -36.4
## Subscription
## 1 no
## 2 no
## 3 no
## 4 no
## 5 no
## 6 no
numCatCat(bank3, explanatory = "job", response = "Subscription")
numCatCat(bank3, explanatory = "marital", response = "Subscription")
numCatCat(bank3, explanatory = "education", response = "Subscription")
numCatCat(bank3, explanatory = "default", response = "Subscription")
numCatCat(bank3, explanatory = "housing", response = "Subscription")
numCatCat(bank3, explanatory = "loan", response = "Subscription")
numCatCat(bank3, explanatory = "month", response = "Subscription")
numCatCat(bank3, explanatory = "day_of_week", response = "Subscription")
numCatCat(bank3, explanatory = "poutcome", response = "Subscription")
bank4 <- select(bank3, -c("marital", "housing", "loan", "day_of_week"))
EUbank4 <- select(EUbank3, -c("marital", "housing", "loan", "day_of_week"))
summary(bank4)
## job education default
## admin. :10422 university.degree :12168 no :32588
## blue-collar: 9254 high.school : 9515 unknown: 8597
## technician : 6743 basic.9y : 6045 yes : 3
## services : 3969 professional.course: 5243
## management : 2924 basic.4y : 4176
## retired : 1720 basic.6y : 2292
## (Other) : 6156 (Other) : 1749
## contact month campaign previous
## cellular :26144 may :13769 Min. : 1.000 Min. :0.000
## telephone:15044 jul : 7174 1st Qu.: 1.000 1st Qu.:0.000
## aug : 6178 Median : 2.000 Median :0.000
## jun : 5318 Mean : 2.568 Mean :0.173
## nov : 4101 3rd Qu.: 3.000 3rd Qu.:0.000
## apr : 2632 Max. :56.000 Max. :7.000
## (Other): 2016
## poutcome cons.price.idx cons.conf.idx Subscription
## failure : 4252 Min. :92.20 Min. :-50.8 no :36548
## nonexistent:35563 1st Qu.:93.08 1st Qu.:-42.7 yes: 4640
## success : 1373 Median :93.75 Median :-41.8
## Mean :93.58 Mean :-40.5
## 3rd Qu.:93.99 3rd Qu.:-36.4
## Max. :94.77 Max. :-26.9
##
write.csv(bank4, file="/Users/Jaco/Desktop/SMU/Spring2020/DS_6372_Applied_Statistics/Project.2/Data/simplelogic.csv")
write.csv(EUbank4, file="/Users/Jaco/Desktop/SMU/Spring2020/DS_6372_Applied_Statistics/Project.2/Data/simplelogicEU3.csv")